"""
演示通过pyspark代码加载数据,即数据的输入
"""
from pyspark import SparkContext,SparkConf

conf = SparkConf().setMaster("local[*]").setAppName("test_spark")
sc = SparkContext(conf=conf)

# 通过parallelize方法将python对象转化到Spark内,成为RDD对象
rdd1 = sc.parallelize([1, 2, 3, 4, 5])
rdd2 = sc.parallelize((1, 2, 3, 4, 5,))
rdd3 = sc.parallelize("abcdefg")
rdd4 = sc.parallelize({"key1": "value1", "key2": "value2"})
rdd5 = sc.parallelize({1, 2, 3, 4, 5})

#如果要查看RDD里面有什么内容,需要用collect()方法
# print(rdd1.collect())
# print(rdd2.collect())
# print(rdd3.collect())
# print(rdd4.collect())
# print(rdd5.collect())


#通过textFile方法, 读取文件数据加载到Spark内, 成为RDD对象
rdd = sc.textFile("D:/hello.txt")
print(rdd.collect())

#关闭链接
sc.stop()

